{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3e3f374a",
   "metadata": {},
   "source": [
    "This notebook computes various quality metrics for the German-English translation dataset annotated with error types (https://github.com/deep-spin/hallucinations-in-nmt). \n",
    "\n",
    "Before running it, make sure that you have installed the requirements and downloaded the translation model and the LASER2 sentence encoder (see `../README.md`).\n",
    "\n",
    "The results are stored in `../computed_data/detection_metrics.tsv` and later analyzed in the notebook `02_Detection_analysis.ipynb`."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa5262f6",
   "metadata": {},
   "source": [
    "# Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dd5bfe1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to the translation model and its vocabulary, in order to compute ALTI correctly. \n",
    "MODEL_DIR = '../model'\n",
    "DATA_DIR = '../model/wmt18_de-en'\n",
    "LASER_DIR = '../laser'\n",
    "USE_GPU = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cf323269",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':16:8'\n",
    "os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'\n",
    "# this is for comet to behave"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "60466e1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "0f80f40e",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.use_deterministic_algorithms(False) # otherwise, comet complains\n",
    "#!pip install unbabel-comet==1.1.2 --use-feature=2020-resolver\n",
    "import comet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "d5d4d6a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from stopes.eval.alti.wrappers.transformer_wrapper import FairseqTransformerHub\n",
    "from stopes.eval.alti.alti_metrics.alti_metrics_utils import compute_alti_metrics, compute_alti_nllb, get_loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "ea78bfb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from stopes.modules.preprocess.laser_sentence_encoder import SentenceEncoder, spm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "0961ef43",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sentence_transformers import SentenceTransformer\n",
    "from transformers import AutoModelForSequenceClassification, AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7fbc7943",
   "metadata": {},
   "outputs": [],
   "source": [
    "import gc\n",
    "\n",
    "def cleanup():\n",
    "    gc.collect()\n",
    "    if torch.cuda.is_available():\n",
    "        torch.cuda.empty_cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9d8dd374",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm.auto import tqdm, trange"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "34499932",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "59e1a59b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu import CHRF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "dd8fcf5d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import roc_auc_score\n",
    "from scipy.stats import spearmanr"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a26cee22",
   "metadata": {},
   "source": [
    "# Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3f99a9ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "gt = pd.read_csv('../annotated_data/guerreiro2022_corpus_w_annotations.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "afe9e5cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "gt['any_mistake'] = 1 - gt.correctness\n",
    "gt['any_detached'] = gt[['strong-unsupport', 'full-unsupport']].max(1)\n",
    "gt['repeat_or_detached'] = gt[['repetitions', 'strong-unsupport', 'full-unsupport']].max(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e32c091",
   "metadata": {},
   "source": [
    "Compute the most severe error for each translation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "80efea56",
   "metadata": {},
   "outputs": [],
   "source": [
    "error_rank = ['full-unsupport', 'strong-unsupport', 'repetitions', 'omission','named-entities']\n",
    "error_classes = error_rank + ['other_error', 'correct']\n",
    "\n",
    "def get_most_important_error(row):\n",
    "    for e in error_rank:\n",
    "        if row[e]:\n",
    "            return e\n",
    "    if row['correctness']:\n",
    "        return error_classes[-1]\n",
    "    else:\n",
    "        return error_classes[-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "1d83c81d",
   "metadata": {},
   "outputs": [],
   "source": [
    "gt['error_class'] = gt.apply(get_most_important_error, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55c9c149",
   "metadata": {},
   "source": [
    "# Compute metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e21512f1",
   "metadata": {},
   "source": [
    "### ALTI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c040cda7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:25:51 | INFO | fairseq.file_utils | loading archive file ../model\n",
      "2022-12-19 07:25:51 | INFO | fairseq.checkpoint_utils | load_model_ensemble_and_task is_moe=False\n",
      "2022-12-19 07:25:54 | INFO | fairseq.checkpoint_utils | Rank 0: Done reading from disk\n",
      "2022-12-19 07:25:55 | INFO | fairseq.tasks.translation | [de] dictionary: 32032 types\n",
      "2022-12-19 07:25:55 | INFO | fairseq.tasks.translation | [en] dictionary: 32032 types\n",
      "2022-12-19 07:25:55 | INFO | fairseq.checkpoint_utils | Done loading state dict\n",
      "2022-12-19 07:25:55 | INFO | fairseq.models.fairseq_model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 10, 'log_format': None, 'log_file': None, 'tensorboard_logdir': None, 'wandb_project': 'mt-hallucinations', 'azureml_logging': False, 'seed': 42, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': False, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'pytorch_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': False, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': False, 'max_tokens': 8192, 'batch_size': None, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': 8192, 'batch_size_valid': None, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 250000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': False, 'update_freq': [4], 'lr': [0.0005], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False}, 'checkpoint': {'_name': None, 'save_dir': '/home/nunomg/mt-hallucinations/HALO/fairseq/checkpoints/wmt18_de-en', 'restore_file': 'checkpoint_last.pt', 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 50000, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': 10, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': False, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'bleu', 'maximize_best_checkpoint_metric': True, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': Namespace(_name='transformer_wmt_en_de', activation_dropout=0.0, activation_fn='relu', adam_betas='(0.9, 0.98)', adam_eps=1e-08, adaptive_input=False, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, arch='transformer_wmt_en_de', attention_dropout=0.0, azureml_logging=False, batch_size=None, batch_size_valid=None, best_checkpoint_metric='bleu', bf16=False, bpe='sentencepiece', broadcast_buffers=False, bucket_cap_mb=25, checkpoint_activations=False, checkpoint_shard_count=1, checkpoint_suffix='', clip_norm=0.0, combine_valid_subsets=None, cpu=False, cpu_offload=False, criterion='label_smoothed_cross_entropy', cross_self_attention=False, curriculum=0, data='/private/home/daviddale/dev/nllb/demo/alti/de-en-hallucinations/model/wmt18_de-en', data_buffer_size=10, dataset_impl=None, ddp_backend='pytorch_ddp', ddp_comm_hook='none', decoder_attention_heads=8, decoder_embed_dim=512, decoder_embed_path=None, decoder_ffn_embed_dim=2048, decoder_input_dim=512, decoder_layerdrop=0, decoder_layers=6, decoder_layers_to_keep=None, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=512, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, dropout=0.3, ema_decay=0.9999, ema_fp32=False, ema_seed_model=None, ema_start_update=0, ema_update_freq=1, empty_cache_freq=0, encoder_attention_heads=8, encoder_embed_dim=512, encoder_embed_path=None, encoder_ffn_embed_dim=2048, encoder_layerdrop=0, encoder_layers=6, encoder_layers_to_keep=None, encoder_learned_pos=False, encoder_normalize_before=False, eos=2, eval_bleu=True, eval_bleu_args='{\"beam\": 5, \"max_len_a\": 1.2, \"max_len_b\": 10}', eval_bleu_detok='space', eval_bleu_detok_args='{}', eval_bleu_print_samples=True, eval_bleu_remove_bpe='sentencepiece', eval_tokenized_bleu=False, fast_stat_sync=False, find_unused_parameters=False, finetune_from_model=None, fix_batches_to_gpus=False, fixed_validation_seed=None, fp16=False, fp16_adam_stats=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, fp32_reduce_scatter=False, gen_subset='test', gradient_as_bucket_view=False, grouped_shuffling=False, heartbeat_timeout=-1, ignore_prefix_size=0, ignore_unused_valid_subsets=False, keep_best_checkpoints=-1, keep_interval_updates=-1, keep_interval_updates_pattern=-1, keep_last_epochs=10, label_smoothing=0.1, layernorm_embedding=False, left_pad_source=True, left_pad_target=False, load_alignments=False, load_checkpoint_on_all_dp_ranks=False, localsgd_frequency=3, log_file=None, log_format=None, log_interval=10, lr=[0.0005], lr_scheduler='inverse_sqrt', max_epoch=0, max_source_positions=1024, max_target_positions=1024, max_tokens=8192, max_tokens_valid=8192, max_update=250000, max_valid_steps=None, maximize_best_checkpoint_metric=True, memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, min_params_to_wrap=100000000, model_parallel_size=1, no_cross_attention=False, no_epoch_checkpoints=False, no_last_checkpoints=False, no_progress_bar=False, no_reshard_after_forward=False, no_save=False, no_save_optimizer_state=False, no_scale_embedding=False, no_seed_provided=False, no_token_positional_embeddings=False, not_fsdp_flatten_parameters=False, nprocs_per_node=1, num_batch_buckets=0, num_shards=1, num_workers=1, offload_activations=False, on_cpu_convert_precision=False, optimizer='adam', optimizer_overrides='{}', pad=1, patience=-1, pipeline_balance=None, pipeline_checkpoint='never', pipeline_chunks=0, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_devices=None, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_model_parallel=False, plasma_path='/tmp/plasma', profile=False, quant_noise_pq=0, quant_noise_pq_block_size=8, quant_noise_scalar=0, quantization_config_path=None, report_accuracy=False, required_batch_size_multiple=8, required_seq_len_multiple=1, reset_dataloader=False, reset_logging=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='/home/nunomg/mt-hallucinations/HALO/fairseq/checkpoints/wmt18_de-en', save_interval=1, save_interval_updates=50000, scoring='bleu', seed=42, sentence_avg=False, sentencepiece_model='../model/sentencepiece.joint.bpe.model', shard_id=0, share_all_embeddings=False, share_decoder_input_output_embed=True, simul_type=None, skip_invalid_size_inputs_valid_test=False, skip_remainder_batch=False, slowmo_base_algorithm='localsgd', slowmo_momentum=None, source_lang='de', stop_min_lr=-1.0, stop_time_hours=0, store_ema=False, suppress_crashes=False, target_lang='en', task='translation', tensorboard_logdir=None, threshold_loss_scale=None, tie_adaptive_weights=False, tokenizer=None, tpu=False, train_subset='train', truncate_source=False, unk=3, update_epoch_batch_itr=False, update_freq=[4], update_ordered_indices_seed=False, upsample_primary=-1, use_bmuf=False, use_old_adam=False, use_plasma_view=False, use_sharded_state=False, user_dir=None, valid_subset='valid', validate_after_updates=0, validate_interval=1, validate_interval_updates=0, wandb_project='mt-hallucinations', warmup_init_lr=-1, warmup_updates=4000, weight_decay=0.0001, write_checkpoints_asynchronously=False, zero_sharding='none'), 'task': {'_name': 'translation', 'data': '/private/home/daviddale/dev/nllb/demo/alti/de-en-hallucinations/model/wmt18_de-en', 'source_lang': 'de', 'target_lang': 'en', 'load_alignments': False, 'left_pad_source': True, 'left_pad_target': False, 'max_source_positions': 1024, 'max_target_positions': 1024, 'upsample_primary': -1, 'truncate_source': False, 'num_batch_buckets': 0, 'train_subset': 'train', 'dataset_impl': None, 'required_seq_len_multiple': 1, 'eval_bleu': True, 'eval_bleu_args': '{\"beam\": 5, \"max_len_a\": 1.2, \"max_len_b\": 10}', 'eval_bleu_detok': 'space', 'eval_bleu_detok_args': '{}', 'eval_tokenized_bleu': False, 'eval_bleu_remove_bpe': 'sentencepiece', 'eval_bleu_print_samples': True}, 'criterion': {'_name': 'label_smoothed_cross_entropy', 'label_smoothing': 0.1, 'report_accuracy': False, 'ignore_prefix_size': 0, 'sentence_avg': False}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9, 0.98)', 'adam_eps': 1e-08, 'weight_decay': 0.0001, 'use_old_adam': False, 'fp16_adam_stats': False, 'tpu': False, 'lr': [0.0005]}, 'lr_scheduler': {'_name': 'inverse_sqrt', 'warmup_updates': 4000, 'warmup_init_lr': -1.0, 'lr': [0.0005]}, 'scoring': {'_name': 'bleu', 'pad': 1, 'eos': 2, 'unk': 3}, 'bpe': {'_name': 'sentencepiece', 'sentencepiece_model': '../model/sentencepiece.joint.bpe.model', 'sentencepiece_enable_sampling': False, 'sentencepiece_alpha': None}, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'simul_type': None}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "device(type='cpu')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hub = FairseqTransformerHub.from_pretrained(\n",
    "    MODEL_DIR,\n",
    "    checkpoint_file=\"checkpoint_best.pt\",\n",
    "    data_name_or_path=DATA_DIR,\n",
    "    bpe='sentencepiece', \n",
    "    sentencepiece_model=MODEL_DIR + '/sentencepiece.joint.bpe.model',\n",
    ")\n",
    "hub.device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "76b20072",
   "metadata": {},
   "outputs": [],
   "source": [
    "if USE_GPU:\n",
    "    hub.cuda();"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3e97df53",
   "metadata": {},
   "source": [
    "On GPU, the computation is fast: 3415 sentence pairs are processed in about 5 minutes. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3ba947f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7c8292a1cd4d45efb3362e307279047e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/3415 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "computed_alti = []\n",
    "for i, row in tqdm(gt.iterrows(), total=gt.shape[0]):\n",
    "    alti_t = compute_alti_nllb(hub, row.src, row.mt)\n",
    "    computed_alti.append(alti_t)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "97d3097d",
   "metadata": {},
   "source": [
    "Look at an example of ALTI source-target contributions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "d6e3cd55",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_84f05_row0_col0, #T_84f05_row0_col1, #T_84f05_row0_col10, #T_84f05_row0_col11, #T_84f05_row1_col2, #T_84f05_row3_col3, #T_84f05_row4_col7, #T_84f05_row4_col8, #T_84f05_row4_col9, #T_84f05_row5_col4, #T_84f05_row6_col5, #T_84f05_row6_col6 {\n",
       "  background-color: #023858;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row0_col2 {\n",
       "  background-color: #045a8d;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row0_col3 {\n",
       "  background-color: #056ead;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row0_col4, #T_84f05_row4_col0, #T_84f05_row6_col8 {\n",
       "  background-color: #c2cbe2;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row0_col5, #T_84f05_row3_col4 {\n",
       "  background-color: #d0d1e6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row0_col6, #T_84f05_row2_col6, #T_84f05_row7_col4 {\n",
       "  background-color: #f4eef6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row0_col7 {\n",
       "  background-color: #04588a;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row0_col8 {\n",
       "  background-color: #7eadd1;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row0_col9, #T_84f05_row5_col7 {\n",
       "  background-color: #3790c0;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col0 {\n",
       "  background-color: #03476f;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col1 {\n",
       "  background-color: #2f8bbe;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col3 {\n",
       "  background-color: #c9cee4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row1_col4, #T_84f05_row7_col9 {\n",
       "  background-color: #fbf4f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row1_col5, #T_84f05_row1_col6, #T_84f05_row8_col0, #T_84f05_row8_col1, #T_84f05_row8_col2, #T_84f05_row8_col3, #T_84f05_row8_col4, #T_84f05_row8_col7, #T_84f05_row8_col8, #T_84f05_row8_col9, #T_84f05_row8_col10, #T_84f05_row8_col11 {\n",
       "  background-color: #fff7fb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row1_col7 {\n",
       "  background-color: #4697c4;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col8, #T_84f05_row4_col11, #T_84f05_row9_col5 {\n",
       "  background-color: #a5bddb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row1_col9 {\n",
       "  background-color: #73a9cf;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col10 {\n",
       "  background-color: #056ba9;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row1_col11 {\n",
       "  background-color: #045e93;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col0 {\n",
       "  background-color: #62a2cb;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col1, #T_84f05_row3_col11, #T_84f05_row9_col6 {\n",
       "  background-color: #84b0d3;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col2, #T_84f05_row10_col11 {\n",
       "  background-color: #2484ba;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col3 {\n",
       "  background-color: #02395a;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col4 {\n",
       "  background-color: #e4e1ef;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row2_col5, #T_84f05_row10_col3 {\n",
       "  background-color: #f1ebf4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row2_col7 {\n",
       "  background-color: #034f7d;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col8, #T_84f05_row2_col11 {\n",
       "  background-color: #308cbe;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col9 {\n",
       "  background-color: #0a73b2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row2_col10 {\n",
       "  background-color: #0567a2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row3_col0 {\n",
       "  background-color: #b8c6e0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col1 {\n",
       "  background-color: #b4c4df;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col2 {\n",
       "  background-color: #99b8d8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col5, #T_84f05_row9_col7 {\n",
       "  background-color: #f9f2f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col6, #T_84f05_row7_col8, #T_84f05_row9_col2, #T_84f05_row11_col4 {\n",
       "  background-color: #fdf5fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col7 {\n",
       "  background-color: #78abd0;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row3_col8 {\n",
       "  background-color: #c4cbe3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col9 {\n",
       "  background-color: #b1c2de;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row3_col10, #T_84f05_row4_col10 {\n",
       "  background-color: #4c99c5;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row4_col1 {\n",
       "  background-color: #cacee5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row4_col2 {\n",
       "  background-color: #b9c6e0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row4_col3, #T_84f05_row10_col5 {\n",
       "  background-color: #91b5d6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row4_col4, #T_84f05_row11_col2 {\n",
       "  background-color: #dad9ea;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row4_col5 {\n",
       "  background-color: #dcdaeb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row4_col6, #T_84f05_row11_col11 {\n",
       "  background-color: #dedcec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col0, #T_84f05_row5_col2, #T_84f05_row10_col1, #T_84f05_row10_col2 {\n",
       "  background-color: #e8e4f0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col1, #T_84f05_row11_col8 {\n",
       "  background-color: #ece7f2;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col3 {\n",
       "  background-color: #e7e3f0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col5 {\n",
       "  background-color: #9ab8d8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col6 {\n",
       "  background-color: #a4bcda;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row5_col8 {\n",
       "  background-color: #0569a5;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row5_col9 {\n",
       "  background-color: #2786bb;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row5_col10 {\n",
       "  background-color: #67a4cc;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row5_col11 {\n",
       "  background-color: #cdd0e5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col0, #T_84f05_row6_col3 {\n",
       "  background-color: #f4edf6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col1 {\n",
       "  background-color: #f5eef6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col2, #T_84f05_row9_col4 {\n",
       "  background-color: #f2ecf5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col4 {\n",
       "  background-color: #348ebf;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row6_col7 {\n",
       "  background-color: #bfc9e1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col9 {\n",
       "  background-color: #cccfe5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col10 {\n",
       "  background-color: #a8bedc;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row6_col11 {\n",
       "  background-color: #e0dded;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row7_col0, #T_84f05_row7_col1, #T_84f05_row7_col2, #T_84f05_row7_col11, #T_84f05_row9_col3 {\n",
       "  background-color: #fef6fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row7_col3, #T_84f05_row9_col0, #T_84f05_row9_col1 {\n",
       "  background-color: #fef6fb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row7_col5 {\n",
       "  background-color: #7dacd1;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row7_col6 {\n",
       "  background-color: #549cc7;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row7_col7, #T_84f05_row7_col10, #T_84f05_row9_col10 {\n",
       "  background-color: #f8f1f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row8_col5 {\n",
       "  background-color: #c8cde4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row8_col6 {\n",
       "  background-color: #b7c5df;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row9_col8 {\n",
       "  background-color: #faf2f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row9_col9, #T_84f05_row11_col6 {\n",
       "  background-color: #faf3f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row9_col11 {\n",
       "  background-color: #fcf4fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col0 {\n",
       "  background-color: #e9e5f1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col4 {\n",
       "  background-color: #d7d6e9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col6 {\n",
       "  background-color: #8fb4d6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col7 {\n",
       "  background-color: #bcc7e1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col8 {\n",
       "  background-color: #c6cce3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col9 {\n",
       "  background-color: #c5cce3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row10_col10 {\n",
       "  background-color: #2d8abd;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_84f05_row11_col0, #T_84f05_row11_col10 {\n",
       "  background-color: #dfddec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row11_col1 {\n",
       "  background-color: #e3e0ee;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row11_col3 {\n",
       "  background-color: #efe9f3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row11_col5 {\n",
       "  background-color: #fbf3f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row11_col7 {\n",
       "  background-color: #dddbec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_84f05_row11_col9 {\n",
       "  background-color: #e2dfee;\n",
       "  color: #000000;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_84f05\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_84f05_level0_col0\" class=\"col_heading level0 col0\" >▁Lassen</th>\n",
       "      <th id=\"T_84f05_level0_col1\" class=\"col_heading level0 col1\" >▁Sie</th>\n",
       "      <th id=\"T_84f05_level0_col2\" class=\"col_heading level0 col2\" >▁mich</th>\n",
       "      <th id=\"T_84f05_level0_col3\" class=\"col_heading level0 col3\" >▁zunächst</th>\n",
       "      <th id=\"T_84f05_level0_col4\" class=\"col_heading level0 col4\" >▁meine</th>\n",
       "      <th id=\"T_84f05_level0_col5\" class=\"col_heading level0 col5\" >▁Ver</th>\n",
       "      <th id=\"T_84f05_level0_col6\" class=\"col_heading level0 col6\" >legenheit</th>\n",
       "      <th id=\"T_84f05_level0_col7\" class=\"col_heading level0 col7\" >▁zum</th>\n",
       "      <th id=\"T_84f05_level0_col8\" class=\"col_heading level0 col8\" >▁Ausdruck</th>\n",
       "      <th id=\"T_84f05_level0_col9\" class=\"col_heading level0 col9\" >▁bringen</th>\n",
       "      <th id=\"T_84f05_level0_col10\" class=\"col_heading level0 col10\" >.</th>\n",
       "      <th id=\"T_84f05_level0_col11\" class=\"col_heading level0 col11\" ></s></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row0\" class=\"row_heading level0 row0\" >▁Let</th>\n",
       "      <td id=\"T_84f05_row0_col0\" class=\"data row0 col0\" >0.187230</td>\n",
       "      <td id=\"T_84f05_row0_col1\" class=\"data row0 col1\" >0.054757</td>\n",
       "      <td id=\"T_84f05_row0_col2\" class=\"data row0 col2\" >0.152082</td>\n",
       "      <td id=\"T_84f05_row0_col3\" class=\"data row0 col3\" >0.179530</td>\n",
       "      <td id=\"T_84f05_row0_col4\" class=\"data row0 col4\" >0.065776</td>\n",
       "      <td id=\"T_84f05_row0_col5\" class=\"data row0 col5\" >0.033765</td>\n",
       "      <td id=\"T_84f05_row0_col6\" class=\"data row0 col6\" >0.050423</td>\n",
       "      <td id=\"T_84f05_row0_col7\" class=\"data row0 col7\" >0.012787</td>\n",
       "      <td id=\"T_84f05_row0_col8\" class=\"data row0 col8\" >0.059994</td>\n",
       "      <td id=\"T_84f05_row0_col9\" class=\"data row0 col9\" >0.035203</td>\n",
       "      <td id=\"T_84f05_row0_col10\" class=\"data row0 col10\" >0.011735</td>\n",
       "      <td id=\"T_84f05_row0_col11\" class=\"data row0 col11\" >0.003673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row1\" class=\"row_heading level0 row1\" >▁me</th>\n",
       "      <td id=\"T_84f05_row1_col0\" class=\"data row1 col0\" >0.177234</td>\n",
       "      <td id=\"T_84f05_row1_col1\" class=\"data row1 col1\" >0.036178</td>\n",
       "      <td id=\"T_84f05_row1_col2\" class=\"data row1 col2\" >0.172876</td>\n",
       "      <td id=\"T_84f05_row1_col3\" class=\"data row1 col3\" >0.068960</td>\n",
       "      <td id=\"T_84f05_row1_col4\" class=\"data row1 col4\" >0.029418</td>\n",
       "      <td id=\"T_84f05_row1_col5\" class=\"data row1 col5\" >0.008563</td>\n",
       "      <td id=\"T_84f05_row1_col6\" class=\"data row1 col6\" >0.023114</td>\n",
       "      <td id=\"T_84f05_row1_col7\" class=\"data row1 col7\" >0.009291</td>\n",
       "      <td id=\"T_84f05_row1_col8\" class=\"data row1 col8\" >0.050244</td>\n",
       "      <td id=\"T_84f05_row1_col9\" class=\"data row1 col9\" >0.029518</td>\n",
       "      <td id=\"T_84f05_row1_col10\" class=\"data row1 col10\" >0.009452</td>\n",
       "      <td id=\"T_84f05_row1_col11\" class=\"data row1 col11\" >0.003174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row2\" class=\"row_heading level0 row2\" >▁begin</th>\n",
       "      <td id=\"T_84f05_row2_col0\" class=\"data row2 col0\" >0.105086</td>\n",
       "      <td id=\"T_84f05_row2_col1\" class=\"data row2 col1\" >0.026848</td>\n",
       "      <td id=\"T_84f05_row2_col2\" class=\"data row2 col2\" >0.118675</td>\n",
       "      <td id=\"T_84f05_row2_col3\" class=\"data row2 col3\" >0.232698</td>\n",
       "      <td id=\"T_84f05_row2_col4\" class=\"data row2 col4\" >0.048564</td>\n",
       "      <td id=\"T_84f05_row2_col5\" class=\"data row2 col5\" >0.018145</td>\n",
       "      <td id=\"T_84f05_row2_col6\" class=\"data row2 col6\" >0.051057</td>\n",
       "      <td id=\"T_84f05_row2_col7\" class=\"data row2 col7\" >0.013129</td>\n",
       "      <td id=\"T_84f05_row2_col8\" class=\"data row2 col8\" >0.076830</td>\n",
       "      <td id=\"T_84f05_row2_col9\" class=\"data row2 col9\" >0.040446</td>\n",
       "      <td id=\"T_84f05_row2_col10\" class=\"data row2 col10\" >0.009672</td>\n",
       "      <td id=\"T_84f05_row2_col11\" class=\"data row2 col11\" >0.002440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row3\" class=\"row_heading level0 row3\" >▁by</th>\n",
       "      <td id=\"T_84f05_row3_col0\" class=\"data row3 col0\" >0.067121</td>\n",
       "      <td id=\"T_84f05_row3_col1\" class=\"data row3 col1\" >0.020307</td>\n",
       "      <td id=\"T_84f05_row3_col2\" class=\"data row3 col2\" >0.075587</td>\n",
       "      <td id=\"T_84f05_row3_col3\" class=\"data row3 col3\" >0.233973</td>\n",
       "      <td id=\"T_84f05_row3_col4\" class=\"data row3 col4\" >0.060660</td>\n",
       "      <td id=\"T_84f05_row3_col5\" class=\"data row3 col5\" >0.012521</td>\n",
       "      <td id=\"T_84f05_row3_col6\" class=\"data row3 col6\" >0.029689</td>\n",
       "      <td id=\"T_84f05_row3_col7\" class=\"data row3 col7\" >0.008087</td>\n",
       "      <td id=\"T_84f05_row3_col8\" class=\"data row3 col8\" >0.040760</td>\n",
       "      <td id=\"T_84f05_row3_col9\" class=\"data row3 col9\" >0.022088</td>\n",
       "      <td id=\"T_84f05_row3_col10\" class=\"data row3 col10\" >0.007429</td>\n",
       "      <td id=\"T_84f05_row3_col11\" class=\"data row3 col11\" >0.001830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row4\" class=\"row_heading level0 row4\" >▁expressing</th>\n",
       "      <td id=\"T_84f05_row4_col0\" class=\"data row4 col0\" >0.061585</td>\n",
       "      <td id=\"T_84f05_row4_col1\" class=\"data row4 col1\" >0.016918</td>\n",
       "      <td id=\"T_84f05_row4_col2\" class=\"data row4 col2\" >0.060454</td>\n",
       "      <td id=\"T_84f05_row4_col3\" class=\"data row4 col3\" >0.104339</td>\n",
       "      <td id=\"T_84f05_row4_col4\" class=\"data row4 col4\" >0.054063</td>\n",
       "      <td id=\"T_84f05_row4_col5\" class=\"data row4 col5\" >0.028572</td>\n",
       "      <td id=\"T_84f05_row4_col6\" class=\"data row4 col6\" >0.095226</td>\n",
       "      <td id=\"T_84f05_row4_col7\" class=\"data row4 col7\" >0.014208</td>\n",
       "      <td id=\"T_84f05_row4_col8\" class=\"data row4 col8\" >0.113486</td>\n",
       "      <td id=\"T_84f05_row4_col9\" class=\"data row4 col9\" >0.052795</td>\n",
       "      <td id=\"T_84f05_row4_col10\" class=\"data row4 col10\" >0.007430</td>\n",
       "      <td id=\"T_84f05_row4_col11\" class=\"data row4 col11\" >0.001544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row5\" class=\"row_heading level0 row5\" >▁my</th>\n",
       "      <td id=\"T_84f05_row5_col0\" class=\"data row5 col0\" >0.035415</td>\n",
       "      <td id=\"T_84f05_row5_col1\" class=\"data row5 col1\" >0.009602</td>\n",
       "      <td id=\"T_84f05_row5_col2\" class=\"data row5 col2\" >0.031961</td>\n",
       "      <td id=\"T_84f05_row5_col3\" class=\"data row5 col3\" >0.041206</td>\n",
       "      <td id=\"T_84f05_row5_col4\" class=\"data row5 col4\" >0.162535</td>\n",
       "      <td id=\"T_84f05_row5_col5\" class=\"data row5 col5\" >0.049221</td>\n",
       "      <td id=\"T_84f05_row5_col6\" class=\"data row5 col6\" >0.167154</td>\n",
       "      <td id=\"T_84f05_row5_col7\" class=\"data row5 col7\" >0.009681</td>\n",
       "      <td id=\"T_84f05_row5_col8\" class=\"data row5 col8\" >0.091974</td>\n",
       "      <td id=\"T_84f05_row5_col9\" class=\"data row5 col9\" >0.037068</td>\n",
       "      <td id=\"T_84f05_row5_col10\" class=\"data row5 col10\" >0.006908</td>\n",
       "      <td id=\"T_84f05_row5_col11\" class=\"data row5 col11\" >0.001144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row6\" class=\"row_heading level0 row6\" >▁embar</th>\n",
       "      <td id=\"T_84f05_row6_col0\" class=\"data row6 col0\" >0.023746</td>\n",
       "      <td id=\"T_84f05_row6_col1\" class=\"data row6 col1\" >0.006623</td>\n",
       "      <td id=\"T_84f05_row6_col2\" class=\"data row6 col2\" >0.022936</td>\n",
       "      <td id=\"T_84f05_row6_col3\" class=\"data row6 col3\" >0.024855</td>\n",
       "      <td id=\"T_84f05_row6_col4\" class=\"data row6 col4\" >0.112323</td>\n",
       "      <td id=\"T_84f05_row6_col5\" class=\"data row6 col5\" >0.109145</td>\n",
       "      <td id=\"T_84f05_row6_col6\" class=\"data row6 col6\" >0.401771</td>\n",
       "      <td id=\"T_84f05_row6_col7\" class=\"data row6 col7\" >0.005850</td>\n",
       "      <td id=\"T_84f05_row6_col8\" class=\"data row6 col8\" >0.041487</td>\n",
       "      <td id=\"T_84f05_row6_col9\" class=\"data row6 col9\" >0.018518</td>\n",
       "      <td id=\"T_84f05_row6_col10\" class=\"data row6 col10\" >0.005279</td>\n",
       "      <td id=\"T_84f05_row6_col11\" class=\"data row6 col11\" >0.000874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row7\" class=\"row_heading level0 row7\" >r</th>\n",
       "      <td id=\"T_84f05_row7_col0\" class=\"data row7 col0\" >0.011822</td>\n",
       "      <td id=\"T_84f05_row7_col1\" class=\"data row7 col1\" >0.003594</td>\n",
       "      <td id=\"T_84f05_row7_col2\" class=\"data row7 col2\" >0.009987</td>\n",
       "      <td id=\"T_84f05_row7_col3\" class=\"data row7 col3\" >0.009004</td>\n",
       "      <td id=\"T_84f05_row7_col4\" class=\"data row7 col4\" >0.036264</td>\n",
       "      <td id=\"T_84f05_row7_col5\" class=\"data row7 col5\" >0.056879</td>\n",
       "      <td id=\"T_84f05_row7_col6\" class=\"data row7 col6\" >0.237397</td>\n",
       "      <td id=\"T_84f05_row7_col7\" class=\"data row7 col7\" >0.002786</td>\n",
       "      <td id=\"T_84f05_row7_col8\" class=\"data row7 col8\" >0.013612</td>\n",
       "      <td id=\"T_84f05_row7_col9\" class=\"data row7 col9\" >0.007376</td>\n",
       "      <td id=\"T_84f05_row7_col10\" class=\"data row7 col10\" >0.001992</td>\n",
       "      <td id=\"T_84f05_row7_col11\" class=\"data row7 col11\" >0.000290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row8\" class=\"row_heading level0 row8\" >ass</th>\n",
       "      <td id=\"T_84f05_row8_col0\" class=\"data row8 col0\" >0.010152</td>\n",
       "      <td id=\"T_84f05_row8_col1\" class=\"data row8 col1\" >0.003053</td>\n",
       "      <td id=\"T_84f05_row8_col2\" class=\"data row8 col2\" >0.008315</td>\n",
       "      <td id=\"T_84f05_row8_col3\" class=\"data row8 col3\" >0.007592</td>\n",
       "      <td id=\"T_84f05_row8_col4\" class=\"data row8 col4\" >0.026204</td>\n",
       "      <td id=\"T_84f05_row8_col5\" class=\"data row8 col5\" >0.036452</td>\n",
       "      <td id=\"T_84f05_row8_col6\" class=\"data row8 col6\" >0.145966</td>\n",
       "      <td id=\"T_84f05_row8_col7\" class=\"data row8 col7\" >0.002191</td>\n",
       "      <td id=\"T_84f05_row8_col8\" class=\"data row8 col8\" >0.011672</td>\n",
       "      <td id=\"T_84f05_row8_col9\" class=\"data row8 col9\" >0.006172</td>\n",
       "      <td id=\"T_84f05_row8_col10\" class=\"data row8 col10\" >0.001527</td>\n",
       "      <td id=\"T_84f05_row8_col11\" class=\"data row8 col11\" >0.000253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row9\" class=\"row_heading level0 row9\" >ment</th>\n",
       "      <td id=\"T_84f05_row9_col0\" class=\"data row9 col0\" >0.011504</td>\n",
       "      <td id=\"T_84f05_row9_col1\" class=\"data row9 col1\" >0.003347</td>\n",
       "      <td id=\"T_84f05_row9_col2\" class=\"data row9 col2\" >0.010321</td>\n",
       "      <td id=\"T_84f05_row9_col3\" class=\"data row9 col3\" >0.009686</td>\n",
       "      <td id=\"T_84f05_row9_col4\" class=\"data row9 col4\" >0.038061</td>\n",
       "      <td id=\"T_84f05_row9_col5\" class=\"data row9 col5\" >0.046663</td>\n",
       "      <td id=\"T_84f05_row9_col6\" class=\"data row9 col6\" >0.196906</td>\n",
       "      <td id=\"T_84f05_row9_col7\" class=\"data row9 col7\" >0.002677</td>\n",
       "      <td id=\"T_84f05_row9_col8\" class=\"data row9 col8\" >0.015470</td>\n",
       "      <td id=\"T_84f05_row9_col9\" class=\"data row9 col9\" >0.007741</td>\n",
       "      <td id=\"T_84f05_row9_col10\" class=\"data row9 col10\" >0.002025</td>\n",
       "      <td id=\"T_84f05_row9_col11\" class=\"data row9 col11\" >0.000322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row10\" class=\"row_heading level0 row10\" >.</th>\n",
       "      <td id=\"T_84f05_row10_col0\" class=\"data row10 col0\" >0.034624</td>\n",
       "      <td id=\"T_84f05_row10_col1\" class=\"data row10 col1\" >0.010515</td>\n",
       "      <td id=\"T_84f05_row10_col2\" class=\"data row10 col2\" >0.031751</td>\n",
       "      <td id=\"T_84f05_row10_col3\" class=\"data row10 col3\" >0.029445</td>\n",
       "      <td id=\"T_84f05_row10_col4\" class=\"data row10 col4\" >0.056192</td>\n",
       "      <td id=\"T_84f05_row10_col5\" class=\"data row10 col5\" >0.051430</td>\n",
       "      <td id=\"T_84f05_row10_col6\" class=\"data row10 col6\" >0.186680</td>\n",
       "      <td id=\"T_84f05_row10_col7\" class=\"data row10 col7\" >0.005912</td>\n",
       "      <td id=\"T_84f05_row10_col8\" class=\"data row10 col8\" >0.039981</td>\n",
       "      <td id=\"T_84f05_row10_col9\" class=\"data row10 col9\" >0.019408</td>\n",
       "      <td id=\"T_84f05_row10_col10\" class=\"data row10 col10\" >0.008137</td>\n",
       "      <td id=\"T_84f05_row10_col11\" class=\"data row10 col11\" >0.002545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_84f05_level0_row11\" class=\"row_heading level0 row11\" ></s></th>\n",
       "      <td id=\"T_84f05_row11_col0\" class=\"data row11 col0\" >0.042708</td>\n",
       "      <td id=\"T_84f05_row11_col1\" class=\"data row11 col1\" >0.011732</td>\n",
       "      <td id=\"T_84f05_row11_col2\" class=\"data row11 col2\" >0.042177</td>\n",
       "      <td id=\"T_84f05_row11_col3\" class=\"data row11 col3\" >0.031944</td>\n",
       "      <td id=\"T_84f05_row11_col4\" class=\"data row11 col4\" >0.028125</td>\n",
       "      <td id=\"T_84f05_row11_col5\" class=\"data row11 col5\" >0.011670</td>\n",
       "      <td id=\"T_84f05_row11_col6\" class=\"data row11 col6\" >0.034999</td>\n",
       "      <td id=\"T_84f05_row11_col7\" class=\"data row11 col7\" >0.004519</td>\n",
       "      <td id=\"T_84f05_row11_col8\" class=\"data row11 col8\" >0.024745</td>\n",
       "      <td id=\"T_84f05_row11_col9\" class=\"data row11 col9\" >0.014020</td>\n",
       "      <td id=\"T_84f05_row11_col10\" class=\"data row11 col10\" >0.003402</td>\n",
       "      <td id=\"T_84f05_row11_col11\" class=\"data row11 col11\" >0.000897</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f8143b32c10>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alti_matrix, source_sentence, target_sentence, predicted_sentence = computed_alti[2]\n",
    "pd.DataFrame(\n",
    "    alti_matrix[:, :len(source_sentence)], \n",
    "    index=predicted_sentence, columns=source_sentence,\n",
    ").style.background_gradient()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "13e793f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_d9e09_row0_col0, #T_d9e09_row0_col1, #T_d9e09_row0_col10, #T_d9e09_row0_col11, #T_d9e09_row1_col2, #T_d9e09_row3_col3, #T_d9e09_row4_col7, #T_d9e09_row4_col8, #T_d9e09_row4_col9, #T_d9e09_row5_col4, #T_d9e09_row6_col5, #T_d9e09_row6_col6 {\n",
       "  background-color: #023858;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row0_col2 {\n",
       "  background-color: #045a8d;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row0_col3 {\n",
       "  background-color: #056ead;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row0_col4, #T_d9e09_row4_col0, #T_d9e09_row6_col8 {\n",
       "  background-color: #c2cbe2;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row0_col5, #T_d9e09_row3_col4 {\n",
       "  background-color: #d0d1e6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row0_col6, #T_d9e09_row2_col6, #T_d9e09_row7_col4 {\n",
       "  background-color: #f4eef6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row0_col7 {\n",
       "  background-color: #04588a;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row0_col8 {\n",
       "  background-color: #7eadd1;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row0_col9, #T_d9e09_row5_col7 {\n",
       "  background-color: #3790c0;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col0 {\n",
       "  background-color: #03476f;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col1 {\n",
       "  background-color: #2f8bbe;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col3 {\n",
       "  background-color: #c9cee4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row1_col4, #T_d9e09_row7_col9 {\n",
       "  background-color: #fbf4f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row1_col5, #T_d9e09_row1_col6, #T_d9e09_row8_col0, #T_d9e09_row8_col1, #T_d9e09_row8_col2, #T_d9e09_row8_col3, #T_d9e09_row8_col4, #T_d9e09_row8_col7, #T_d9e09_row8_col8, #T_d9e09_row8_col9, #T_d9e09_row8_col10, #T_d9e09_row8_col11 {\n",
       "  background-color: #fff7fb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row1_col7 {\n",
       "  background-color: #4697c4;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col8, #T_d9e09_row4_col11, #T_d9e09_row9_col5 {\n",
       "  background-color: #a5bddb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row1_col9 {\n",
       "  background-color: #73a9cf;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col10 {\n",
       "  background-color: #056ba9;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row1_col11 {\n",
       "  background-color: #045e93;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col0 {\n",
       "  background-color: #62a2cb;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col1, #T_d9e09_row3_col11, #T_d9e09_row9_col6 {\n",
       "  background-color: #84b0d3;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col2, #T_d9e09_row10_col11 {\n",
       "  background-color: #2484ba;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col3 {\n",
       "  background-color: #02395a;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col4 {\n",
       "  background-color: #e4e1ef;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row2_col5, #T_d9e09_row10_col3 {\n",
       "  background-color: #f1ebf4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row2_col7 {\n",
       "  background-color: #034f7d;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col8, #T_d9e09_row2_col11 {\n",
       "  background-color: #308cbe;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col9 {\n",
       "  background-color: #0a73b2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row2_col10 {\n",
       "  background-color: #0567a2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row3_col0 {\n",
       "  background-color: #b8c6e0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col1 {\n",
       "  background-color: #b4c4df;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col2 {\n",
       "  background-color: #99b8d8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col5, #T_d9e09_row9_col7 {\n",
       "  background-color: #f9f2f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col6, #T_d9e09_row7_col8, #T_d9e09_row9_col2, #T_d9e09_row11_col4 {\n",
       "  background-color: #fdf5fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col7 {\n",
       "  background-color: #78abd0;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row3_col8 {\n",
       "  background-color: #c4cbe3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col9 {\n",
       "  background-color: #b1c2de;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row3_col10, #T_d9e09_row4_col10 {\n",
       "  background-color: #4c99c5;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row4_col1 {\n",
       "  background-color: #cacee5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row4_col2 {\n",
       "  background-color: #b9c6e0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row4_col3, #T_d9e09_row10_col5 {\n",
       "  background-color: #91b5d6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row4_col4, #T_d9e09_row11_col2 {\n",
       "  background-color: #dad9ea;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row4_col5 {\n",
       "  background-color: #dcdaeb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row4_col6, #T_d9e09_row11_col11 {\n",
       "  background-color: #dedcec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col0, #T_d9e09_row5_col2, #T_d9e09_row10_col1, #T_d9e09_row10_col2 {\n",
       "  background-color: #e8e4f0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col1, #T_d9e09_row11_col8 {\n",
       "  background-color: #ece7f2;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col3 {\n",
       "  background-color: #e7e3f0;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col5 {\n",
       "  background-color: #9ab8d8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col6 {\n",
       "  background-color: #a4bcda;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row5_col8 {\n",
       "  background-color: #0569a5;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row5_col9 {\n",
       "  background-color: #2786bb;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row5_col10 {\n",
       "  background-color: #67a4cc;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row5_col11 {\n",
       "  background-color: #cdd0e5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col0, #T_d9e09_row6_col3 {\n",
       "  background-color: #f4edf6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col1 {\n",
       "  background-color: #f5eef6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col2, #T_d9e09_row9_col4 {\n",
       "  background-color: #f2ecf5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col4 {\n",
       "  background-color: #348ebf;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row6_col7 {\n",
       "  background-color: #bfc9e1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col9 {\n",
       "  background-color: #cccfe5;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col10 {\n",
       "  background-color: #a8bedc;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row6_col11 {\n",
       "  background-color: #e0dded;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row7_col0, #T_d9e09_row7_col1, #T_d9e09_row7_col2, #T_d9e09_row7_col11, #T_d9e09_row9_col3 {\n",
       "  background-color: #fef6fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row7_col3, #T_d9e09_row9_col0, #T_d9e09_row9_col1 {\n",
       "  background-color: #fef6fb;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row7_col5 {\n",
       "  background-color: #7dacd1;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row7_col6 {\n",
       "  background-color: #549cc7;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row7_col7, #T_d9e09_row7_col10, #T_d9e09_row9_col10 {\n",
       "  background-color: #f8f1f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row8_col5 {\n",
       "  background-color: #c8cde4;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row8_col6 {\n",
       "  background-color: #b7c5df;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row9_col8 {\n",
       "  background-color: #faf2f8;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row9_col9, #T_d9e09_row11_col6 {\n",
       "  background-color: #faf3f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row9_col11 {\n",
       "  background-color: #fcf4fa;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col0 {\n",
       "  background-color: #e9e5f1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col4 {\n",
       "  background-color: #d7d6e9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col6 {\n",
       "  background-color: #8fb4d6;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col7 {\n",
       "  background-color: #bcc7e1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col8 {\n",
       "  background-color: #c6cce3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col9 {\n",
       "  background-color: #c5cce3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row10_col10 {\n",
       "  background-color: #2d8abd;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_d9e09_row11_col0, #T_d9e09_row11_col10 {\n",
       "  background-color: #dfddec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row11_col1 {\n",
       "  background-color: #e3e0ee;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row11_col3 {\n",
       "  background-color: #efe9f3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row11_col5 {\n",
       "  background-color: #fbf3f9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row11_col7 {\n",
       "  background-color: #dddbec;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_d9e09_row11_col9 {\n",
       "  background-color: #e2dfee;\n",
       "  color: #000000;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_d9e09\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_d9e09_level0_col0\" class=\"col_heading level0 col0\" >▁Lassen</th>\n",
       "      <th id=\"T_d9e09_level0_col1\" class=\"col_heading level0 col1\" >▁Sie</th>\n",
       "      <th id=\"T_d9e09_level0_col2\" class=\"col_heading level0 col2\" >▁mich</th>\n",
       "      <th id=\"T_d9e09_level0_col3\" class=\"col_heading level0 col3\" >▁zunächst</th>\n",
       "      <th id=\"T_d9e09_level0_col4\" class=\"col_heading level0 col4\" >▁meine</th>\n",
       "      <th id=\"T_d9e09_level0_col5\" class=\"col_heading level0 col5\" >▁Ver</th>\n",
       "      <th id=\"T_d9e09_level0_col6\" class=\"col_heading level0 col6\" >legenheit</th>\n",
       "      <th id=\"T_d9e09_level0_col7\" class=\"col_heading level0 col7\" >▁zum</th>\n",
       "      <th id=\"T_d9e09_level0_col8\" class=\"col_heading level0 col8\" >▁Ausdruck</th>\n",
       "      <th id=\"T_d9e09_level0_col9\" class=\"col_heading level0 col9\" >▁bringen</th>\n",
       "      <th id=\"T_d9e09_level0_col10\" class=\"col_heading level0 col10\" >.</th>\n",
       "      <th id=\"T_d9e09_level0_col11\" class=\"col_heading level0 col11\" ></s></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row0\" class=\"row_heading level0 row0\" >▁Let</th>\n",
       "      <td id=\"T_d9e09_row0_col0\" class=\"data row0 col0\" >0.187230</td>\n",
       "      <td id=\"T_d9e09_row0_col1\" class=\"data row0 col1\" >0.054757</td>\n",
       "      <td id=\"T_d9e09_row0_col2\" class=\"data row0 col2\" >0.152082</td>\n",
       "      <td id=\"T_d9e09_row0_col3\" class=\"data row0 col3\" >0.179530</td>\n",
       "      <td id=\"T_d9e09_row0_col4\" class=\"data row0 col4\" >0.065776</td>\n",
       "      <td id=\"T_d9e09_row0_col5\" class=\"data row0 col5\" >0.033765</td>\n",
       "      <td id=\"T_d9e09_row0_col6\" class=\"data row0 col6\" >0.050423</td>\n",
       "      <td id=\"T_d9e09_row0_col7\" class=\"data row0 col7\" >0.012787</td>\n",
       "      <td id=\"T_d9e09_row0_col8\" class=\"data row0 col8\" >0.059994</td>\n",
       "      <td id=\"T_d9e09_row0_col9\" class=\"data row0 col9\" >0.035203</td>\n",
       "      <td id=\"T_d9e09_row0_col10\" class=\"data row0 col10\" >0.011735</td>\n",
       "      <td id=\"T_d9e09_row0_col11\" class=\"data row0 col11\" >0.003673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row1\" class=\"row_heading level0 row1\" >▁me</th>\n",
       "      <td id=\"T_d9e09_row1_col0\" class=\"data row1 col0\" >0.177234</td>\n",
       "      <td id=\"T_d9e09_row1_col1\" class=\"data row1 col1\" >0.036178</td>\n",
       "      <td id=\"T_d9e09_row1_col2\" class=\"data row1 col2\" >0.172876</td>\n",
       "      <td id=\"T_d9e09_row1_col3\" class=\"data row1 col3\" >0.068960</td>\n",
       "      <td id=\"T_d9e09_row1_col4\" class=\"data row1 col4\" >0.029418</td>\n",
       "      <td id=\"T_d9e09_row1_col5\" class=\"data row1 col5\" >0.008563</td>\n",
       "      <td id=\"T_d9e09_row1_col6\" class=\"data row1 col6\" >0.023113</td>\n",
       "      <td id=\"T_d9e09_row1_col7\" class=\"data row1 col7\" >0.009291</td>\n",
       "      <td id=\"T_d9e09_row1_col8\" class=\"data row1 col8\" >0.050243</td>\n",
       "      <td id=\"T_d9e09_row1_col9\" class=\"data row1 col9\" >0.029518</td>\n",
       "      <td id=\"T_d9e09_row1_col10\" class=\"data row1 col10\" >0.009452</td>\n",
       "      <td id=\"T_d9e09_row1_col11\" class=\"data row1 col11\" >0.003174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row2\" class=\"row_heading level0 row2\" >▁begin</th>\n",
       "      <td id=\"T_d9e09_row2_col0\" class=\"data row2 col0\" >0.105086</td>\n",
       "      <td id=\"T_d9e09_row2_col1\" class=\"data row2 col1\" >0.026849</td>\n",
       "      <td id=\"T_d9e09_row2_col2\" class=\"data row2 col2\" >0.118675</td>\n",
       "      <td id=\"T_d9e09_row2_col3\" class=\"data row2 col3\" >0.232698</td>\n",
       "      <td id=\"T_d9e09_row2_col4\" class=\"data row2 col4\" >0.048564</td>\n",
       "      <td id=\"T_d9e09_row2_col5\" class=\"data row2 col5\" >0.018145</td>\n",
       "      <td id=\"T_d9e09_row2_col6\" class=\"data row2 col6\" >0.051057</td>\n",
       "      <td id=\"T_d9e09_row2_col7\" class=\"data row2 col7\" >0.013128</td>\n",
       "      <td id=\"T_d9e09_row2_col8\" class=\"data row2 col8\" >0.076830</td>\n",
       "      <td id=\"T_d9e09_row2_col9\" class=\"data row2 col9\" >0.040446</td>\n",
       "      <td id=\"T_d9e09_row2_col10\" class=\"data row2 col10\" >0.009672</td>\n",
       "      <td id=\"T_d9e09_row2_col11\" class=\"data row2 col11\" >0.002440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row3\" class=\"row_heading level0 row3\" >▁by</th>\n",
       "      <td id=\"T_d9e09_row3_col0\" class=\"data row3 col0\" >0.067120</td>\n",
       "      <td id=\"T_d9e09_row3_col1\" class=\"data row3 col1\" >0.020307</td>\n",
       "      <td id=\"T_d9e09_row3_col2\" class=\"data row3 col2\" >0.075587</td>\n",
       "      <td id=\"T_d9e09_row3_col3\" class=\"data row3 col3\" >0.233973</td>\n",
       "      <td id=\"T_d9e09_row3_col4\" class=\"data row3 col4\" >0.060660</td>\n",
       "      <td id=\"T_d9e09_row3_col5\" class=\"data row3 col5\" >0.012521</td>\n",
       "      <td id=\"T_d9e09_row3_col6\" class=\"data row3 col6\" >0.029689</td>\n",
       "      <td id=\"T_d9e09_row3_col7\" class=\"data row3 col7\" >0.008087</td>\n",
       "      <td id=\"T_d9e09_row3_col8\" class=\"data row3 col8\" >0.040760</td>\n",
       "      <td id=\"T_d9e09_row3_col9\" class=\"data row3 col9\" >0.022088</td>\n",
       "      <td id=\"T_d9e09_row3_col10\" class=\"data row3 col10\" >0.007429</td>\n",
       "      <td id=\"T_d9e09_row3_col11\" class=\"data row3 col11\" >0.001829</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row4\" class=\"row_heading level0 row4\" >▁expressing</th>\n",
       "      <td id=\"T_d9e09_row4_col0\" class=\"data row4 col0\" >0.061585</td>\n",
       "      <td id=\"T_d9e09_row4_col1\" class=\"data row4 col1\" >0.016918</td>\n",
       "      <td id=\"T_d9e09_row4_col2\" class=\"data row4 col2\" >0.060454</td>\n",
       "      <td id=\"T_d9e09_row4_col3\" class=\"data row4 col3\" >0.104339</td>\n",
       "      <td id=\"T_d9e09_row4_col4\" class=\"data row4 col4\" >0.054063</td>\n",
       "      <td id=\"T_d9e09_row4_col5\" class=\"data row4 col5\" >0.028572</td>\n",
       "      <td id=\"T_d9e09_row4_col6\" class=\"data row4 col6\" >0.095226</td>\n",
       "      <td id=\"T_d9e09_row4_col7\" class=\"data row4 col7\" >0.014207</td>\n",
       "      <td id=\"T_d9e09_row4_col8\" class=\"data row4 col8\" >0.113486</td>\n",
       "      <td id=\"T_d9e09_row4_col9\" class=\"data row4 col9\" >0.052795</td>\n",
       "      <td id=\"T_d9e09_row4_col10\" class=\"data row4 col10\" >0.007430</td>\n",
       "      <td id=\"T_d9e09_row4_col11\" class=\"data row4 col11\" >0.001544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row5\" class=\"row_heading level0 row5\" >▁my</th>\n",
       "      <td id=\"T_d9e09_row5_col0\" class=\"data row5 col0\" >0.035415</td>\n",
       "      <td id=\"T_d9e09_row5_col1\" class=\"data row5 col1\" >0.009602</td>\n",
       "      <td id=\"T_d9e09_row5_col2\" class=\"data row5 col2\" >0.031960</td>\n",
       "      <td id=\"T_d9e09_row5_col3\" class=\"data row5 col3\" >0.041206</td>\n",
       "      <td id=\"T_d9e09_row5_col4\" class=\"data row5 col4\" >0.162535</td>\n",
       "      <td id=\"T_d9e09_row5_col5\" class=\"data row5 col5\" >0.049221</td>\n",
       "      <td id=\"T_d9e09_row5_col6\" class=\"data row5 col6\" >0.167154</td>\n",
       "      <td id=\"T_d9e09_row5_col7\" class=\"data row5 col7\" >0.009680</td>\n",
       "      <td id=\"T_d9e09_row5_col8\" class=\"data row5 col8\" >0.091974</td>\n",
       "      <td id=\"T_d9e09_row5_col9\" class=\"data row5 col9\" >0.037068</td>\n",
       "      <td id=\"T_d9e09_row5_col10\" class=\"data row5 col10\" >0.006908</td>\n",
       "      <td id=\"T_d9e09_row5_col11\" class=\"data row5 col11\" >0.001144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row6\" class=\"row_heading level0 row6\" >▁embar</th>\n",
       "      <td id=\"T_d9e09_row6_col0\" class=\"data row6 col0\" >0.023746</td>\n",
       "      <td id=\"T_d9e09_row6_col1\" class=\"data row6 col1\" >0.006623</td>\n",
       "      <td id=\"T_d9e09_row6_col2\" class=\"data row6 col2\" >0.022936</td>\n",
       "      <td id=\"T_d9e09_row6_col3\" class=\"data row6 col3\" >0.024855</td>\n",
       "      <td id=\"T_d9e09_row6_col4\" class=\"data row6 col4\" >0.112323</td>\n",
       "      <td id=\"T_d9e09_row6_col5\" class=\"data row6 col5\" >0.109145</td>\n",
       "      <td id=\"T_d9e09_row6_col6\" class=\"data row6 col6\" >0.401772</td>\n",
       "      <td id=\"T_d9e09_row6_col7\" class=\"data row6 col7\" >0.005849</td>\n",
       "      <td id=\"T_d9e09_row6_col8\" class=\"data row6 col8\" >0.041486</td>\n",
       "      <td id=\"T_d9e09_row6_col9\" class=\"data row6 col9\" >0.018517</td>\n",
       "      <td id=\"T_d9e09_row6_col10\" class=\"data row6 col10\" >0.005279</td>\n",
       "      <td id=\"T_d9e09_row6_col11\" class=\"data row6 col11\" >0.000874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row7\" class=\"row_heading level0 row7\" >r</th>\n",
       "      <td id=\"T_d9e09_row7_col0\" class=\"data row7 col0\" >0.011822</td>\n",
       "      <td id=\"T_d9e09_row7_col1\" class=\"data row7 col1\" >0.003594</td>\n",
       "      <td id=\"T_d9e09_row7_col2\" class=\"data row7 col2\" >0.009987</td>\n",
       "      <td id=\"T_d9e09_row7_col3\" class=\"data row7 col3\" >0.009004</td>\n",
       "      <td id=\"T_d9e09_row7_col4\" class=\"data row7 col4\" >0.036264</td>\n",
       "      <td id=\"T_d9e09_row7_col5\" class=\"data row7 col5\" >0.056879</td>\n",
       "      <td id=\"T_d9e09_row7_col6\" class=\"data row7 col6\" >0.237397</td>\n",
       "      <td id=\"T_d9e09_row7_col7\" class=\"data row7 col7\" >0.002786</td>\n",
       "      <td id=\"T_d9e09_row7_col8\" class=\"data row7 col8\" >0.013612</td>\n",
       "      <td id=\"T_d9e09_row7_col9\" class=\"data row7 col9\" >0.007376</td>\n",
       "      <td id=\"T_d9e09_row7_col10\" class=\"data row7 col10\" >0.001992</td>\n",
       "      <td id=\"T_d9e09_row7_col11\" class=\"data row7 col11\" >0.000290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row8\" class=\"row_heading level0 row8\" >ass</th>\n",
       "      <td id=\"T_d9e09_row8_col0\" class=\"data row8 col0\" >0.010152</td>\n",
       "      <td id=\"T_d9e09_row8_col1\" class=\"data row8 col1\" >0.003053</td>\n",
       "      <td id=\"T_d9e09_row8_col2\" class=\"data row8 col2\" >0.008315</td>\n",
       "      <td id=\"T_d9e09_row8_col3\" class=\"data row8 col3\" >0.007592</td>\n",
       "      <td id=\"T_d9e09_row8_col4\" class=\"data row8 col4\" >0.026204</td>\n",
       "      <td id=\"T_d9e09_row8_col5\" class=\"data row8 col5\" >0.036452</td>\n",
       "      <td id=\"T_d9e09_row8_col6\" class=\"data row8 col6\" >0.145966</td>\n",
       "      <td id=\"T_d9e09_row8_col7\" class=\"data row8 col7\" >0.002191</td>\n",
       "      <td id=\"T_d9e09_row8_col8\" class=\"data row8 col8\" >0.011672</td>\n",
       "      <td id=\"T_d9e09_row8_col9\" class=\"data row8 col9\" >0.006172</td>\n",
       "      <td id=\"T_d9e09_row8_col10\" class=\"data row8 col10\" >0.001527</td>\n",
       "      <td id=\"T_d9e09_row8_col11\" class=\"data row8 col11\" >0.000253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row9\" class=\"row_heading level0 row9\" >ment</th>\n",
       "      <td id=\"T_d9e09_row9_col0\" class=\"data row9 col0\" >0.011504</td>\n",
       "      <td id=\"T_d9e09_row9_col1\" class=\"data row9 col1\" >0.003347</td>\n",
       "      <td id=\"T_d9e09_row9_col2\" class=\"data row9 col2\" >0.010321</td>\n",
       "      <td id=\"T_d9e09_row9_col3\" class=\"data row9 col3\" >0.009686</td>\n",
       "      <td id=\"T_d9e09_row9_col4\" class=\"data row9 col4\" >0.038061</td>\n",
       "      <td id=\"T_d9e09_row9_col5\" class=\"data row9 col5\" >0.046663</td>\n",
       "      <td id=\"T_d9e09_row9_col6\" class=\"data row9 col6\" >0.196906</td>\n",
       "      <td id=\"T_d9e09_row9_col7\" class=\"data row9 col7\" >0.002677</td>\n",
       "      <td id=\"T_d9e09_row9_col8\" class=\"data row9 col8\" >0.015470</td>\n",
       "      <td id=\"T_d9e09_row9_col9\" class=\"data row9 col9\" >0.007741</td>\n",
       "      <td id=\"T_d9e09_row9_col10\" class=\"data row9 col10\" >0.002025</td>\n",
       "      <td id=\"T_d9e09_row9_col11\" class=\"data row9 col11\" >0.000322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row10\" class=\"row_heading level0 row10\" >.</th>\n",
       "      <td id=\"T_d9e09_row10_col0\" class=\"data row10 col0\" >0.034624</td>\n",
       "      <td id=\"T_d9e09_row10_col1\" class=\"data row10 col1\" >0.010515</td>\n",
       "      <td id=\"T_d9e09_row10_col2\" class=\"data row10 col2\" >0.031751</td>\n",
       "      <td id=\"T_d9e09_row10_col3\" class=\"data row10 col3\" >0.029445</td>\n",
       "      <td id=\"T_d9e09_row10_col4\" class=\"data row10 col4\" >0.056192</td>\n",
       "      <td id=\"T_d9e09_row10_col5\" class=\"data row10 col5\" >0.051430</td>\n",
       "      <td id=\"T_d9e09_row10_col6\" class=\"data row10 col6\" >0.186680</td>\n",
       "      <td id=\"T_d9e09_row10_col7\" class=\"data row10 col7\" >0.005912</td>\n",
       "      <td id=\"T_d9e09_row10_col8\" class=\"data row10 col8\" >0.039982</td>\n",
       "      <td id=\"T_d9e09_row10_col9\" class=\"data row10 col9\" >0.019408</td>\n",
       "      <td id=\"T_d9e09_row10_col10\" class=\"data row10 col10\" >0.008137</td>\n",
       "      <td id=\"T_d9e09_row10_col11\" class=\"data row10 col11\" >0.002545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_d9e09_level0_row11\" class=\"row_heading level0 row11\" ></s></th>\n",
       "      <td id=\"T_d9e09_row11_col0\" class=\"data row11 col0\" >0.042708</td>\n",
       "      <td id=\"T_d9e09_row11_col1\" class=\"data row11 col1\" >0.011732</td>\n",
       "      <td id=\"T_d9e09_row11_col2\" class=\"data row11 col2\" >0.042177</td>\n",
       "      <td id=\"T_d9e09_row11_col3\" class=\"data row11 col3\" >0.031944</td>\n",
       "      <td id=\"T_d9e09_row11_col4\" class=\"data row11 col4\" >0.028125</td>\n",
       "      <td id=\"T_d9e09_row11_col5\" class=\"data row11 col5\" >0.011670</td>\n",
       "      <td id=\"T_d9e09_row11_col6\" class=\"data row11 col6\" >0.034999</td>\n",
       "      <td id=\"T_d9e09_row11_col7\" class=\"data row11 col7\" >0.004519</td>\n",
       "      <td id=\"T_d9e09_row11_col8\" class=\"data row11 col8\" >0.024745</td>\n",
       "      <td id=\"T_d9e09_row11_col9\" class=\"data row11 col9\" >0.014020</td>\n",
       "      <td id=\"T_d9e09_row11_col10\" class=\"data row11 col10\" >0.003402</td>\n",
       "      <td id=\"T_d9e09_row11_col11\" class=\"data row11 col11\" >0.000897</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7fc655d8df70>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alti_matrix, source_sentence, target_sentence, predicted_sentence = computed_alti[2]\n",
    "pd.DataFrame(\n",
    "    alti_matrix[:, :len(source_sentence)], \n",
    "    index=predicted_sentence, columns=source_sentence,\n",
    ").style.background_gradient()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "0ca6269d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7bf5cdcf0d884bdcacc8fc48d96bd0ce",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/3415 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "alti_token_metrics = pd.DataFrame([compute_alti_metrics(*row) for row in tqdm(computed_alti)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "48df9c87",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((3415, 18), (3415, 13))"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alti_token_metrics.shape, gt.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "203b41ea",
   "metadata": {},
   "source": [
    "### Log probablity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "e88c3cf0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "04d5303a356147b5bf6ce79d63f745ef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/3415 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "mt_losses = [get_loss(hub, row.src, row.mt) for i, row in tqdm(gt.iterrows(), total=gt.shape[0])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "10ffe8dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "hub.to('cpu');\n",
    "cleanup();"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63a19303",
   "metadata": {},
   "source": [
    "### Comet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5591b54e",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:37:09 | INFO | comet.download_utils | wmt20-comet-qe-da-v2 is already in cache.\n",
      "Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaModel: ['lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.bias']\n",
      "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "2022-12-19 07:37:18 | INFO | comet.models.base | Encoder model frozen.\n"
     ]
    }
   ],
   "source": [
    "model_path = comet.download_model(\"wmt20-comet-qe-da-v2\")\n",
    "model = comet.load_from_checkpoint(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "0233f6b2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:39:52 | INFO | pytorch_lightning.utilities.rank_zero | GPU available: True, used: True\n",
      "2022-12-19 07:39:52 | INFO | pytorch_lightning.utilities.rank_zero | TPU available: False, using: 0 TPU cores\n",
      "2022-12-19 07:39:52 | INFO | pytorch_lightning.utilities.rank_zero | IPU available: False, using: 0 IPUs\n",
      "2022-12-19 07:39:52 | INFO | pytorch_lightning.utilities.rank_zero | HPU available: False, using: 0 HPUs\n",
      "2022-12-19 07:39:53 | INFO | pytorch_lightning.accelerators.gpu | LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n",
      "Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 427/427 [00:28<00:00, 14.85it/s]\n"
     ]
    }
   ],
   "source": [
    "seg_scores_comet, sys_score_comet = model.predict(gt[['src', 'mt']].to_dict('records'), batch_size=8, gpus=int(USE_GPU))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c6997a4",
   "metadata": {},
   "source": [
    "#### comet with reference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "0bfc0c0b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:40:24 | INFO | comet.download_utils | wmt20-comet-da is already in cache.\n",
      "Some weights of the model checkpoint at xlm-roberta-large were not used when initializing XLMRobertaModel: ['lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'roberta.pooler.dense.bias']\n",
      "- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "2022-12-19 07:40:31 | INFO | comet.models.base | Encoder model frozen.\n"
     ]
    }
   ],
   "source": [
    "model_path = comet.download_model(\"wmt20-comet-da\")\n",
    "model = comet.load_from_checkpoint(model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a1f7c631",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:40:32 | INFO | pytorch_lightning.utilities.rank_zero | GPU available: True, used: True\n",
      "2022-12-19 07:40:32 | INFO | pytorch_lightning.utilities.rank_zero | TPU available: False, using: 0 TPU cores\n",
      "2022-12-19 07:40:32 | INFO | pytorch_lightning.utilities.rank_zero | IPU available: False, using: 0 IPUs\n",
      "2022-12-19 07:40:32 | INFO | pytorch_lightning.utilities.rank_zero | HPU available: False, using: 0 HPUs\n",
      "2022-12-19 07:40:32 | INFO | pytorch_lightning.accelerators.gpu | LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n",
      "Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 427/427 [00:42<00:00, 10.16it/s]\n"
     ]
    }
   ],
   "source": [
    "seg_scores_comet_ref, sys_score_comet_ref = model.predict(\n",
    "    gt[['src', 'mt', 'ref']].to_dict('records'), batch_size=8, gpus=int(USE_GPU)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "762d5f61",
   "metadata": {},
   "source": [
    "### LASER"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "d9b3cdd0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spm_tokenizer = spm.SentencePieceProcessor()\n",
    "spm_tokenizer.Load(LASER_DIR + 'laser2.spm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "5ffb3d6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "laser_encoder = SentenceEncoder(\n",
    "    LASER_DIR + 'laser2.pt',\n",
    "    max_sentences=None,\n",
    "    max_tokens=None,\n",
    "    spm_vocab=LASER_DIR + 'laser2.pt',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "64172384",
   "metadata": {},
   "outputs": [],
   "source": [
    "def encode_sents(sents):\n",
    "    tokenized_sents = [\n",
    "        \" \".join(spm_tokenizer.EncodeAsPieces(sent))\n",
    "        for sent in sents\n",
    "    ]\n",
    "    emb = laser_encoder.encode_sentences(tokenized_sents)\n",
    "    return emb / ((emb**2).sum(1, keepdims=True) ** 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "2ece90bc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 18.9 s, sys: 36.1 ms, total: 18.9 s\n",
      "Wall time: 18.9 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "emb_src = encode_sents(gt.src.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "9927fbda",
   "metadata": {},
   "outputs": [],
   "source": [
    "emb_mt = encode_sents(gt.mt.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "155563be",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3415,)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "laser_sims = (emb_src * emb_mt).sum(1)\n",
    "laser_sims.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8144601",
   "metadata": {},
   "source": [
    "### LaBSE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "c2784500",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install sentence_transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "d02ce7e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-19 07:43:43 | INFO | sentence_transformers.SentenceTransformer | Load pretrained SentenceTransformer: sentence-transformers/LaBSE\n",
      "2022-12-19 07:43:48 | INFO | sentence_transformers.SentenceTransformer | Use pytorch device: cuda\n"
     ]
    }
   ],
   "source": [
    "labse = SentenceTransformer('sentence-transformers/LaBSE')\n",
    "if USE_GPU:\n",
    "    labse.cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "e3a56e5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aab86a5c31a94404b594fecd1d509195",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/107 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "labse_emb_src = labse.encode(gt.src.tolist(), show_progress_bar=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "81774814",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6fdf4c0648364966b17a89d48d8f12ff",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/107 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "labse_emb_mt = labse.encode(gt.mt.tolist(), show_progress_bar=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "5022f086",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3415,)"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labse_sims = (labse_emb_src * labse_emb_mt).sum(1)\n",
    "labse_sims.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "a132c29a",
   "metadata": {},
   "outputs": [],
   "source": [
    "labse.to('cpu');"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2292aec8",
   "metadata": {},
   "source": [
    "### NLI models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "271f9916",
   "metadata": {},
   "outputs": [],
   "source": [
    "nli_models = [\n",
    "    'joeddav/xlm-roberta-large-xnli',   # use only the first one\n",
    "    #'MoritzLaurer/mDeBERTa-v3-base-mnli-xnli', \n",
    "    #'vicgalle/xlm-roberta-large-xnli-anli',\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "1a2f4c8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "joeddav/xlm-roberta-large-xnli\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
      "- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "482940e8c02041348c1fa2fac3ca5759",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/107 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cfa23eba6726476dbee98d3d5b3945bb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/107 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "batch_size = 32\n",
    "nli_scores = {}\n",
    "\n",
    "for mname in nli_models:\n",
    "    print(mname)\n",
    "    model = AutoModelForSequenceClassification.from_pretrained(mname)\n",
    "    if USE_GPU:\n",
    "        model.cuda()\n",
    "    cleanup()\n",
    "    tokenizer = AutoTokenizer.from_pretrained(mname)\n",
    "    # forward scores\n",
    "    scores = []\n",
    "    for i in trange(0, gt.shape[0], batch_size):\n",
    "        b = gt[i:i+batch_size]\n",
    "        with torch.inference_mode():\n",
    "            inputs = tokenizer(b.src.tolist(), b.mt.tolist(), truncation=True, padding=True, return_tensors='pt').to(model.device)\n",
    "            proba = torch.softmax(model(**inputs).logits, -1)[:, model.config.label2id['entailment']].cpu().numpy()\n",
    "        scores.append(proba)\n",
    "    scores = np.concatenate(scores)\n",
    "    nli_scores[f'nli_f_{mname}'] = scores\n",
    "    \n",
    "    # backward scores\n",
    "    scores = []\n",
    "    for i in trange(0, gt.shape[0], batch_size):\n",
    "        b = gt[i:i+batch_size]\n",
    "        with torch.inference_mode():\n",
    "            inputs = tokenizer(b.mt.tolist(), b.src.tolist(), truncation=True, padding=True, return_tensors='pt').to(model.device)\n",
    "            proba = torch.softmax(model(**inputs).logits, -1)[:, model.config.label2id['entailment']].cpu().numpy()\n",
    "        scores.append(proba)\n",
    "    scores = np.concatenate(scores)\n",
    "    nli_scores[f'nli_b_{mname}'] = scores\n",
    "    \n",
    "    # their product\n",
    "    nli_scores[f'nli_bf_{mname}'] = nli_scores[f'nli_b_{mname}'] * nli_scores[f'nli_f_{mname}']\n",
    "\n",
    "model.to('cpu')\n",
    "cleanup()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d4b6285",
   "metadata": {},
   "source": [
    "### ChrF++"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "7be52f05",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8f067970e097471e97e2bf13e2c499cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/3415 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "chrfpp = CHRF(word_order=2)\n",
    "\n",
    "ref_chrfpp = [\n",
    "    chrfpp.sentence_score(row.mt, [row.ref]).score\n",
    "    for i, row in tqdm(gt.iterrows(), total=gt.shape[0])\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ea887058",
   "metadata": {},
   "source": [
    "# Save the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "daa84c3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_metrics = pd.concat([\n",
    "    pd.DataFrame(mt_losses).add_prefix('mt_log_'),\n",
    "    pd.DataFrame({\n",
    "        'comet_qa_neg': 1 - np.array(seg_scores_comet), \n",
    "        'comet_ref_neg': 1 - np.array(seg_scores_comet_ref),\n",
    "        'ref_chrfpp_neg': -np.array(ref_chrfpp),\n",
    "        'laser_sim': -laser_sims, \n",
    "        'labse_sim': -labse_sims, \n",
    "    }),\n",
    "    -pd.DataFrame(nli_scores),\n",
    "    -alti_token_metrics.add_prefix('alti_t_'),\n",
    "], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "52743d54",
   "metadata": {},
   "outputs": [],
   "source": [
    "detection_data = pd.concat([gt, all_metrics], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "10aa259a",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.makedirs('../computed_data', exist_ok=True)\n",
    "detection_data.to_csv('../computed_data/detection_metrics.tsv', sep='\\t', index=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "78e1d547",
   "metadata": {},
   "source": [
    "# Evaluate the metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "a7f353df",
   "metadata": {},
   "outputs": [],
   "source": [
    "target_columns = ['any_mistake', 'repeat_or_detached', 'any_detached', 'full-unsupport']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "6c4d7aec",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.max_rows = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "024e4d7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>any_mistake</th>\n",
       "      <th>repeat_or_detached</th>\n",
       "      <th>any_detached</th>\n",
       "      <th>full-unsupport</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>labse_sim</th>\n",
       "      <td>0.759625</td>\n",
       "      <td>0.917212</td>\n",
       "      <td>0.942578</td>\n",
       "      <td>0.984730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_bf_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.728699</td>\n",
       "      <td>0.909248</td>\n",
       "      <td>0.932571</td>\n",
       "      <td>0.986676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_b_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.725535</td>\n",
       "      <td>0.899990</td>\n",
       "      <td>0.924351</td>\n",
       "      <td>0.971583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_f_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.661679</td>\n",
       "      <td>0.880811</td>\n",
       "      <td>0.905899</td>\n",
       "      <td>0.985456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_top_sc_mean</th>\n",
       "      <td>0.694632</td>\n",
       "      <td>0.865731</td>\n",
       "      <td>0.874418</td>\n",
       "      <td>0.972283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc</th>\n",
       "      <td>0.616799</td>\n",
       "      <td>0.849173</td>\n",
       "      <td>0.870443</td>\n",
       "      <td>0.986556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_50</th>\n",
       "      <td>0.618442</td>\n",
       "      <td>0.847574</td>\n",
       "      <td>0.871220</td>\n",
       "      <td>0.983288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc_wo_lang</th>\n",
       "      <td>0.594139</td>\n",
       "      <td>0.847189</td>\n",
       "      <td>0.873031</td>\n",
       "      <td>0.987469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc_wo_eos</th>\n",
       "      <td>0.600112</td>\n",
       "      <td>0.844565</td>\n",
       "      <td>0.867781</td>\n",
       "      <td>0.986171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>comet_ref_neg</th>\n",
       "      <td>0.777755</td>\n",
       "      <td>0.834186</td>\n",
       "      <td>0.840474</td>\n",
       "      <td>0.877104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mt_log_loss_avg</th>\n",
       "      <td>0.743464</td>\n",
       "      <td>0.829836</td>\n",
       "      <td>0.842439</td>\n",
       "      <td>0.934823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_40</th>\n",
       "      <td>0.614518</td>\n",
       "      <td>0.816800</td>\n",
       "      <td>0.841160</td>\n",
       "      <td>0.977819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>laser_sim</th>\n",
       "      <td>0.604820</td>\n",
       "      <td>0.793455</td>\n",
       "      <td>0.820557</td>\n",
       "      <td>0.912202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_mean</th>\n",
       "      <td>0.670132</td>\n",
       "      <td>0.784092</td>\n",
       "      <td>0.816233</td>\n",
       "      <td>0.939808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_30</th>\n",
       "      <td>0.582954</td>\n",
       "      <td>0.765641</td>\n",
       "      <td>0.792552</td>\n",
       "      <td>0.942783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mt_log_loss_sum</th>\n",
       "      <td>0.751012</td>\n",
       "      <td>0.760552</td>\n",
       "      <td>0.750690</td>\n",
       "      <td>0.766685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ref_chrfpp_neg</th>\n",
       "      <td>0.630768</td>\n",
       "      <td>0.754332</td>\n",
       "      <td>0.778798</td>\n",
       "      <td>0.895462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_min_sc</th>\n",
       "      <td>0.630937</td>\n",
       "      <td>0.751457</td>\n",
       "      <td>0.768716</td>\n",
       "      <td>0.907137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_max_contr_min</th>\n",
       "      <td>0.706828</td>\n",
       "      <td>0.748315</td>\n",
       "      <td>0.745604</td>\n",
       "      <td>0.807702</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_top_sc_min</th>\n",
       "      <td>0.691541</td>\n",
       "      <td>0.724673</td>\n",
       "      <td>0.718785</td>\n",
       "      <td>0.785793</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_min</th>\n",
       "      <td>0.686339</td>\n",
       "      <td>0.718703</td>\n",
       "      <td>0.724619</td>\n",
       "      <td>0.815369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>comet_qa_neg</th>\n",
       "      <td>0.772697</td>\n",
       "      <td>0.701680</td>\n",
       "      <td>0.694369</td>\n",
       "      <td>0.660967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_20</th>\n",
       "      <td>0.520161</td>\n",
       "      <td>0.555805</td>\n",
       "      <td>0.560369</td>\n",
       "      <td>0.581511</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_10</th>\n",
       "      <td>0.500366</td>\n",
       "      <td>0.499838</td>\n",
       "      <td>0.499840</td>\n",
       "      <td>0.499848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_entropy</th>\n",
       "      <td>0.347651</td>\n",
       "      <td>0.426520</td>\n",
       "      <td>0.453384</td>\n",
       "      <td>0.512550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_share_wo_eos</th>\n",
       "      <td>0.319496</td>\n",
       "      <td>0.386329</td>\n",
       "      <td>0.408270</td>\n",
       "      <td>0.432695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_below_01</th>\n",
       "      <td>0.404855</td>\n",
       "      <td>0.277920</td>\n",
       "      <td>0.246620</td>\n",
       "      <td>0.129051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_max_contr_below_001</th>\n",
       "      <td>0.326026</td>\n",
       "      <td>0.239501</td>\n",
       "      <td>0.229644</td>\n",
       "      <td>0.167126</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       any_mistake  repeat_or_detached  \\\n",
       "labse_sim                                 0.759625            0.917212   \n",
       "nli_bf_joeddav/xlm-roberta-large-xnli     0.728699            0.909248   \n",
       "nli_b_joeddav/xlm-roberta-large-xnli      0.725535            0.899990   \n",
       "nli_f_joeddav/xlm-roberta-large-xnli      0.661679            0.880811   \n",
       "alti_t_top_sc_mean                        0.694632            0.865731   \n",
       "alti_t_avg_sc                             0.616799            0.849173   \n",
       "alti_t_sc_above_50                        0.618442            0.847574   \n",
       "alti_t_avg_sc_wo_lang                     0.594139            0.847189   \n",
       "alti_t_avg_sc_wo_eos                      0.600112            0.844565   \n",
       "comet_ref_neg                             0.777755            0.834186   \n",
       "mt_log_loss_avg                           0.743464            0.829836   \n",
       "alti_t_sc_above_40                        0.614518            0.816800   \n",
       "laser_sim                                 0.604820            0.793455   \n",
       "alti_t_src_sum_contr_mean                 0.670132            0.784092   \n",
       "alti_t_sc_above_30                        0.582954            0.765641   \n",
       "mt_log_loss_sum                           0.751012            0.760552   \n",
       "ref_chrfpp_neg                            0.630768            0.754332   \n",
       "alti_t_min_sc                             0.630937            0.751457   \n",
       "alti_t_src_max_contr_min                  0.706828            0.748315   \n",
       "alti_t_top_sc_min                         0.691541            0.724673   \n",
       "alti_t_src_sum_contr_min                  0.686339            0.718703   \n",
       "comet_qa_neg                              0.772697            0.701680   \n",
       "alti_t_sc_above_20                        0.520161            0.555805   \n",
       "alti_t_sc_above_10                        0.500366            0.499838   \n",
       "alti_t_sc_entropy                         0.347651            0.426520   \n",
       "alti_t_sc_share_wo_eos                    0.319496            0.386329   \n",
       "alti_t_src_sum_contr_below_01             0.404855            0.277920   \n",
       "alti_t_src_max_contr_below_001            0.326026            0.239501   \n",
       "\n",
       "                                       any_detached  full-unsupport  \n",
       "labse_sim                                  0.942578        0.984730  \n",
       "nli_bf_joeddav/xlm-roberta-large-xnli      0.932571        0.986676  \n",
       "nli_b_joeddav/xlm-roberta-large-xnli       0.924351        0.971583  \n",
       "nli_f_joeddav/xlm-roberta-large-xnli       0.905899        0.985456  \n",
       "alti_t_top_sc_mean                         0.874418        0.972283  \n",
       "alti_t_avg_sc                              0.870443        0.986556  \n",
       "alti_t_sc_above_50                         0.871220        0.983288  \n",
       "alti_t_avg_sc_wo_lang                      0.873031        0.987469  \n",
       "alti_t_avg_sc_wo_eos                       0.867781        0.986171  \n",
       "comet_ref_neg                              0.840474        0.877104  \n",
       "mt_log_loss_avg                            0.842439        0.934823  \n",
       "alti_t_sc_above_40                         0.841160        0.977819  \n",
       "laser_sim                                  0.820557        0.912202  \n",
       "alti_t_src_sum_contr_mean                  0.816233        0.939808  \n",
       "alti_t_sc_above_30                         0.792552        0.942783  \n",
       "mt_log_loss_sum                            0.750690        0.766685  \n",
       "ref_chrfpp_neg                             0.778798        0.895462  \n",
       "alti_t_min_sc                              0.768716        0.907137  \n",
       "alti_t_src_max_contr_min                   0.745604        0.807702  \n",
       "alti_t_top_sc_min                          0.718785        0.785793  \n",
       "alti_t_src_sum_contr_min                   0.724619        0.815369  \n",
       "comet_qa_neg                               0.694369        0.660967  \n",
       "alti_t_sc_above_20                         0.560369        0.581511  \n",
       "alti_t_sc_above_10                         0.499840        0.499848  \n",
       "alti_t_sc_entropy                          0.453384        0.512550  \n",
       "alti_t_sc_share_wo_eos                     0.408270        0.432695  \n",
       "alti_t_src_sum_contr_below_01              0.246620        0.129051  \n",
       "alti_t_src_max_contr_below_001             0.229644        0.167126  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aucs = pd.DataFrame({\n",
    "    target: {pred: roc_auc_score(gt[target], all_metrics[pred]) for pred in all_metrics.columns} \n",
    "    for target in target_columns\n",
    "})\n",
    "aucs.sort_values('repeat_or_detached', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "32541b5d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>any_mistake</th>\n",
       "      <th>repeat_or_detached</th>\n",
       "      <th>any_detached</th>\n",
       "      <th>full-unsupport</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>labse_sim</th>\n",
       "      <td>0.440652</td>\n",
       "      <td>0.422939</td>\n",
       "      <td>0.429378</td>\n",
       "      <td>0.320131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_bf_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.388162</td>\n",
       "      <td>0.414866</td>\n",
       "      <td>0.419670</td>\n",
       "      <td>0.321417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_b_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.382793</td>\n",
       "      <td>0.405481</td>\n",
       "      <td>0.411695</td>\n",
       "      <td>0.311449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nli_f_joeddav/xlm-roberta-large-xnli</th>\n",
       "      <td>0.274413</td>\n",
       "      <td>0.386038</td>\n",
       "      <td>0.393793</td>\n",
       "      <td>0.320611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_top_sc_mean</th>\n",
       "      <td>0.330342</td>\n",
       "      <td>0.370751</td>\n",
       "      <td>0.363251</td>\n",
       "      <td>0.311911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc</th>\n",
       "      <td>0.198239</td>\n",
       "      <td>0.353966</td>\n",
       "      <td>0.359395</td>\n",
       "      <td>0.321337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_50</th>\n",
       "      <td>0.201429</td>\n",
       "      <td>0.353050</td>\n",
       "      <td>0.360869</td>\n",
       "      <td>0.319818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc_wo_lang</th>\n",
       "      <td>0.159779</td>\n",
       "      <td>0.351955</td>\n",
       "      <td>0.361906</td>\n",
       "      <td>0.321940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_avg_sc_wo_eos</th>\n",
       "      <td>0.169916</td>\n",
       "      <td>0.349295</td>\n",
       "      <td>0.356812</td>\n",
       "      <td>0.321083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>comet_ref_neg</th>\n",
       "      <td>0.471423</td>\n",
       "      <td>0.338773</td>\n",
       "      <td>0.330319</td>\n",
       "      <td>0.249052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mt_log_loss_avg</th>\n",
       "      <td>0.413222</td>\n",
       "      <td>0.334363</td>\n",
       "      <td>0.332226</td>\n",
       "      <td>0.287172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_40</th>\n",
       "      <td>0.198400</td>\n",
       "      <td>0.327812</td>\n",
       "      <td>0.337852</td>\n",
       "      <td>0.322115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_30</th>\n",
       "      <td>0.163537</td>\n",
       "      <td>0.312786</td>\n",
       "      <td>0.329675</td>\n",
       "      <td>0.339665</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>laser_sim</th>\n",
       "      <td>0.177908</td>\n",
       "      <td>0.297483</td>\n",
       "      <td>0.310996</td>\n",
       "      <td>0.272232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_mean</th>\n",
       "      <td>0.288759</td>\n",
       "      <td>0.287992</td>\n",
       "      <td>0.306802</td>\n",
       "      <td>0.290464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mt_log_loss_sum</th>\n",
       "      <td>0.426034</td>\n",
       "      <td>0.264128</td>\n",
       "      <td>0.243214</td>\n",
       "      <td>0.176127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ref_chrfpp_neg</th>\n",
       "      <td>0.221981</td>\n",
       "      <td>0.257862</td>\n",
       "      <td>0.270524</td>\n",
       "      <td>0.261216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_min_sc</th>\n",
       "      <td>0.222234</td>\n",
       "      <td>0.254909</td>\n",
       "      <td>0.260702</td>\n",
       "      <td>0.268887</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_max_contr_min</th>\n",
       "      <td>0.351042</td>\n",
       "      <td>0.251723</td>\n",
       "      <td>0.238279</td>\n",
       "      <td>0.203216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_top_sc_min</th>\n",
       "      <td>0.325096</td>\n",
       "      <td>0.227758</td>\n",
       "      <td>0.212260</td>\n",
       "      <td>0.188747</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_min</th>\n",
       "      <td>0.316267</td>\n",
       "      <td>0.221705</td>\n",
       "      <td>0.217919</td>\n",
       "      <td>0.208280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_20</th>\n",
       "      <td>0.130607</td>\n",
       "      <td>0.215918</td>\n",
       "      <td>0.223542</td>\n",
       "      <td>0.205466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>comet_qa_neg</th>\n",
       "      <td>0.462838</td>\n",
       "      <td>0.204448</td>\n",
       "      <td>0.188572</td>\n",
       "      <td>0.106308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_above_10</th>\n",
       "      <td>0.020948</td>\n",
       "      <td>-0.005532</td>\n",
       "      <td>-0.005243</td>\n",
       "      <td>-0.003391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_entropy</th>\n",
       "      <td>-0.258576</td>\n",
       "      <td>-0.074489</td>\n",
       "      <td>-0.045226</td>\n",
       "      <td>0.008289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_sc_share_wo_eos</th>\n",
       "      <td>-0.306363</td>\n",
       "      <td>-0.115231</td>\n",
       "      <td>-0.088995</td>\n",
       "      <td>-0.044450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_sum_contr_below_01</th>\n",
       "      <td>-0.161938</td>\n",
       "      <td>-0.225757</td>\n",
       "      <td>-0.246510</td>\n",
       "      <td>-0.245672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alti_t_src_max_contr_below_001</th>\n",
       "      <td>-0.307457</td>\n",
       "      <td>-0.274965</td>\n",
       "      <td>-0.273110</td>\n",
       "      <td>-0.228907</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       any_mistake  repeat_or_detached  \\\n",
       "labse_sim                                 0.440652            0.422939   \n",
       "nli_bf_joeddav/xlm-roberta-large-xnli     0.388162            0.414866   \n",
       "nli_b_joeddav/xlm-roberta-large-xnli      0.382793            0.405481   \n",
       "nli_f_joeddav/xlm-roberta-large-xnli      0.274413            0.386038   \n",
       "alti_t_top_sc_mean                        0.330342            0.370751   \n",
       "alti_t_avg_sc                             0.198239            0.353966   \n",
       "alti_t_sc_above_50                        0.201429            0.353050   \n",
       "alti_t_avg_sc_wo_lang                     0.159779            0.351955   \n",
       "alti_t_avg_sc_wo_eos                      0.169916            0.349295   \n",
       "comet_ref_neg                             0.471423            0.338773   \n",
       "mt_log_loss_avg                           0.413222            0.334363   \n",
       "alti_t_sc_above_40                        0.198400            0.327812   \n",
       "alti_t_sc_above_30                        0.163537            0.312786   \n",
       "laser_sim                                 0.177908            0.297483   \n",
       "alti_t_src_sum_contr_mean                 0.288759            0.287992   \n",
       "mt_log_loss_sum                           0.426034            0.264128   \n",
       "ref_chrfpp_neg                            0.221981            0.257862   \n",
       "alti_t_min_sc                             0.222234            0.254909   \n",
       "alti_t_src_max_contr_min                  0.351042            0.251723   \n",
       "alti_t_top_sc_min                         0.325096            0.227758   \n",
       "alti_t_src_sum_contr_min                  0.316267            0.221705   \n",
       "alti_t_sc_above_20                        0.130607            0.215918   \n",
       "comet_qa_neg                              0.462838            0.204448   \n",
       "alti_t_sc_above_10                        0.020948           -0.005532   \n",
       "alti_t_sc_entropy                        -0.258576           -0.074489   \n",
       "alti_t_sc_share_wo_eos                   -0.306363           -0.115231   \n",
       "alti_t_src_sum_contr_below_01            -0.161938           -0.225757   \n",
       "alti_t_src_max_contr_below_001           -0.307457           -0.274965   \n",
       "\n",
       "                                       any_detached  full-unsupport  \n",
       "labse_sim                                  0.429378        0.320131  \n",
       "nli_bf_joeddav/xlm-roberta-large-xnli      0.419670        0.321417  \n",
       "nli_b_joeddav/xlm-roberta-large-xnli       0.411695        0.311449  \n",
       "nli_f_joeddav/xlm-roberta-large-xnli       0.393793        0.320611  \n",
       "alti_t_top_sc_mean                         0.363251        0.311911  \n",
       "alti_t_avg_sc                              0.359395        0.321337  \n",
       "alti_t_sc_above_50                         0.360869        0.319818  \n",
       "alti_t_avg_sc_wo_lang                      0.361906        0.321940  \n",
       "alti_t_avg_sc_wo_eos                       0.356812        0.321083  \n",
       "comet_ref_neg                              0.330319        0.249052  \n",
       "mt_log_loss_avg                            0.332226        0.287172  \n",
       "alti_t_sc_above_40                         0.337852        0.322115  \n",
       "alti_t_sc_above_30                         0.329675        0.339665  \n",
       "laser_sim                                  0.310996        0.272232  \n",
       "alti_t_src_sum_contr_mean                  0.306802        0.290464  \n",
       "mt_log_loss_sum                            0.243214        0.176127  \n",
       "ref_chrfpp_neg                             0.270524        0.261216  \n",
       "alti_t_min_sc                              0.260702        0.268887  \n",
       "alti_t_src_max_contr_min                   0.238279        0.203216  \n",
       "alti_t_top_sc_min                          0.212260        0.188747  \n",
       "alti_t_src_sum_contr_min                   0.217919        0.208280  \n",
       "alti_t_sc_above_20                         0.223542        0.205466  \n",
       "comet_qa_neg                               0.188572        0.106308  \n",
       "alti_t_sc_above_10                        -0.005243       -0.003391  \n",
       "alti_t_sc_entropy                         -0.045226        0.008289  \n",
       "alti_t_sc_share_wo_eos                    -0.088995       -0.044450  \n",
       "alti_t_src_sum_contr_below_01             -0.246510       -0.245672  \n",
       "alti_t_src_max_contr_below_001            -0.273110       -0.228907  "
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spearmans = pd.DataFrame({\n",
    "    target: {pred: spearmanr(gt[target], all_metrics[pred]).correlation for pred in all_metrics.columns} \n",
    "    for target in target_columns\n",
    "})\n",
    "spearmans.sort_values('repeat_or_detached', ascending=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fs-nllb-kernel",
   "language": "python",
   "name": "fs-nllb-kernel"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
